using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using DocumentFormat.OpenXml.Presentation;
using DocumentFormat.OpenXml.Spreadsheet;
using NPOI.XSSF.UserModel;
using NPOI.HSSF.UserModel;
using NPOI.SS.UserModel;
using ExamPaperEditingSystem.Models;

namespace ExamPaperEditingSystem.Services
{
    public class DocumentProcessorService : IDocumentProcessorService
    {
        private readonly IEmbeddingService _embeddingService;
        private readonly string[] _supportedExtensions = { ".txt", ".docx", ".pptx", ".xlsx", ".xls" };

        public DocumentProcessorService(IEmbeddingService embeddingService)
        {
            _embeddingService = embeddingService;
        }

        public async Task<List<VectorDocument>> ProcessDocumentsAsync(string directoryPath, string knowledgeBaseId)
        {
            var documents = new List<VectorDocument>();
            
            if (!Directory.Exists(directoryPath))
            {
                return documents;
            }

            var files = Directory.GetFiles(directoryPath, "*.*", SearchOption.AllDirectories)
                .Where(file => IsSupportedFileType(file))
                .ToArray();

            foreach (var file in files)
            {
                try
                {
                    var fileDocuments = await ProcessFileAsync(file, knowledgeBaseId);
                    documents.AddRange(fileDocuments);
                }
                catch (Exception ex)
                {
                    Console.WriteLine($"处理文件 {file} 失败: {ex.Message}");
                }
            }

            return documents;
        }

        public async Task<List<VectorDocument>> ProcessFileAsync(string filePath, string knowledgeBaseId)
        {
            var documents = new List<VectorDocument>();
            
            try
            {
                var text = await ExtractTextFromFileAsync(filePath);
                if (string.IsNullOrWhiteSpace(text))
                {
                    return documents;
                }

                var chunks = SplitTextIntoChunks(text);
                
                for (int i = 0; i < chunks.Count; i++)
                {
                    var chunk = chunks[i];
                    if (string.IsNullOrWhiteSpace(chunk))
                        continue;

                    var embedding = await _embeddingService.GetEmbeddingAsync(chunk);
                    
                    var document = new VectorDocument
                    {
                        Content = chunk,
                        Embedding = embedding,
                        SourceFile = Path.GetFileName(filePath),
                        KnowledgeBase = knowledgeBaseId,
                        Metadata = new Dictionary<string, object>
                        {
                            ["file_path"] = filePath,
                            ["chunk_index"] = i,
                            ["total_chunks"] = chunks.Count,
                            ["file_extension"] = Path.GetExtension(filePath),
                            ["file_size"] = new FileInfo(filePath).Length
                        }
                    };
                    
                    documents.Add(document);
                }
            }
            catch (Exception ex)
            {
                throw new InvalidOperationException($"处理文件 {filePath} 失败: {ex.Message}", ex);
            }

            return documents;
        }

        public async Task<string> ExtractTextFromFileAsync(string filePath)
        {
            var extension = Path.GetExtension(filePath).ToLowerInvariant();
            
            return extension switch
            {
                ".txt" => await ExtractTextFromTxtAsync(filePath),
                ".docx" => ExtractTextFromDocx(filePath),
                ".pptx" => ExtractTextFromPptx(filePath),
                ".xlsx" => ExtractTextFromXlsx(filePath),
                ".xls" => ExtractTextFromXls(filePath),
                _ => throw new NotSupportedException($"不支持的文件类型: {extension}")
            };
        }

        public List<string> SplitTextIntoChunks(string text, int maxChunkSize = 1000, int overlap = 200)
        {
            var chunks = new List<string>();
            
            if (string.IsNullOrWhiteSpace(text))
            {
                return chunks;
            }

            // 按段落分割
            var paragraphs = text.Split(new[] { "\n\n", "\r\n\r\n" }, StringSplitOptions.RemoveEmptyEntries);
            
            var currentChunk = new StringBuilder();
            
            foreach (var paragraph in paragraphs)
            {
                var trimmedParagraph = paragraph.Trim();
                if (string.IsNullOrEmpty(trimmedParagraph))
                    continue;

                // 如果当前段落加上现有内容超过最大长度
                if (currentChunk.Length + trimmedParagraph.Length > maxChunkSize)
                {
                    // 保存当前块
                    if (currentChunk.Length > 0)
                    {
                        chunks.Add(currentChunk.ToString().Trim());
                        
                        // 保留重叠部分
                        var overlapText = GetOverlapText(currentChunk.ToString(), overlap);
                        currentChunk.Clear();
                        if (!string.IsNullOrEmpty(overlapText))
                        {
                            currentChunk.Append(overlapText);
                        }
                    }
                }
                
                // 如果单个段落就超过最大长度，需要进一步分割
                if (trimmedParagraph.Length > maxChunkSize)
                {
                    var sentences = SplitIntoSentences(trimmedParagraph);
                    foreach (var sentence in sentences)
                    {
                        if (currentChunk.Length + sentence.Length > maxChunkSize)
                        {
                            if (currentChunk.Length > 0)
                            {
                                chunks.Add(currentChunk.ToString().Trim());
                                var overlapText = GetOverlapText(currentChunk.ToString(), overlap);
                                currentChunk.Clear();
                                if (!string.IsNullOrEmpty(overlapText))
                                {
                                    currentChunk.Append(overlapText);
                                }
                            }
                        }
                        
                        currentChunk.AppendLine(sentence);
                    }
                }
                else
                {
                    currentChunk.AppendLine(trimmedParagraph);
                }
            }
            
            // 添加最后一个块
            if (currentChunk.Length > 0)
            {
                chunks.Add(currentChunk.ToString().Trim());
            }
            
            return chunks;
        }

        public bool IsSupportedFileType(string filePath)
        {
            var extension = Path.GetExtension(filePath).ToLowerInvariant();
            return _supportedExtensions.Contains(extension);
        }

        private async Task<string> ExtractTextFromTxtAsync(string filePath)
        {
            return await File.ReadAllTextAsync(filePath, Encoding.UTF8);
        }

        private string ExtractTextFromDocx(string filePath)
        {
            var text = new StringBuilder();
            
            using var document = WordprocessingDocument.Open(filePath, false);
            var body = document.MainDocumentPart?.Document?.Body;
            
            if (body != null)
            {
                foreach (var paragraph in body.Elements<Paragraph>())
                {
                    text.AppendLine(paragraph.InnerText);
                }
            }
            
            return text.ToString();
        }

        private string ExtractTextFromPptx(string filePath)
        {
            var text = new StringBuilder();
            
            using var presentation = PresentationDocument.Open(filePath, false);
            var slides = presentation.PresentationPart?.Presentation?.SlideIdList?.Elements<SlideId>();
            
            if (slides != null)
            {
                foreach (var slide in slides)
                {
                    var slidePart = presentation.PresentationPart?.GetPartById(slide.RelationshipId!) as SlidePart;
                    if (slidePart?.Slide != null)
                    {
                        text.AppendLine(slidePart.Slide.InnerText);
                    }
                }
            }
            
            return text.ToString();
        }

        private string ExtractTextFromXlsx(string filePath)
        {
            var text = new StringBuilder();
            
            using var document = SpreadsheetDocument.Open(filePath, false);
            var workbookPart = document.WorkbookPart;
            var sheets = workbookPart?.Workbook?.Sheets?.Elements<Sheet>();
            
            if (sheets != null)
            {
                foreach (var sheet in sheets)
                {
                    var worksheetPart = workbookPart?.GetPartById(sheet.Id!) as WorksheetPart;
                    var sheetData = worksheetPart?.Worksheet?.Elements<SheetData>().FirstOrDefault();
                    
                    if (sheetData != null)
                    {
                        foreach (var row in sheetData.Elements<Row>())
                        {
                            foreach (var cell in row.Elements<Cell>())
                            {
                                var cellValue = GetCellValue(cell, workbookPart!);
                                if (!string.IsNullOrEmpty(cellValue))
                                {
                                    text.Append(cellValue + "\t");
                                }
                            }
                            text.AppendLine();
                        }
                    }
                }
            }
            
            return text.ToString();
        }

        private string ExtractTextFromXls(string filePath)
        {
            var text = new StringBuilder();
            
            using var fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read);
            var workbook = new HSSFWorkbook(fileStream);
            
            for (int i = 0; i < workbook.NumberOfSheets; i++)
            {
                var sheet = workbook.GetSheetAt(i);
                foreach (IRow row in sheet)
                {
                    foreach (ICell cell in row.Cells)
                    {
                        var cellValue = cell.ToString();
                        if (!string.IsNullOrEmpty(cellValue))
                        {
                            text.Append(cellValue + "\t");
                        }
                    }
                    text.AppendLine();
                }
            }
            
            return text.ToString();
        }

        private string GetCellValue(Cell cell, WorkbookPart workbookPart)
        {
            if (cell.CellValue == null)
                return string.Empty;

            var value = cell.CellValue.Text;
            
            if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
            {
                var stringTable = workbookPart.SharedStringTablePart?.SharedStringTable;
                if (stringTable != null && int.TryParse(value, out var index))
                {
                    return stringTable.ElementAt(index).InnerText;
                }
            }
            
            return value;
        }

        private List<string> SplitIntoSentences(string text)
        {
            var sentences = new List<string>();
            var sentenceEnders = new[] { '.', '!', '?', '。', '！', '？' };
            
            var currentSentence = new StringBuilder();
            
            foreach (var ch in text)
            {
                currentSentence.Append(ch);
                
                if (sentenceEnders.Contains(ch))
                {
                    var sentence = currentSentence.ToString().Trim();
                    if (!string.IsNullOrEmpty(sentence))
                    {
                        sentences.Add(sentence);
                    }
                    currentSentence.Clear();
                }
            }
            
            // 添加最后一个句子
            if (currentSentence.Length > 0)
            {
                var sentence = currentSentence.ToString().Trim();
                if (!string.IsNullOrEmpty(sentence))
                {
                    sentences.Add(sentence);
                }
            }
            
            return sentences;
        }

        private string GetOverlapText(string text, int overlapSize)
        {
            if (text.Length <= overlapSize)
                return text;
            
            return text.Substring(text.Length - overlapSize);
        }
    }
}